import scrapy
from tutorial.items import TvItem


class TvSpider(scrapy.Spider):
    name = "tv"
    allowed_domains = ["v.qq.com"]
    start_urls = [
        "http://v.qq.com/x/list/tv",
    ]

    def parse(self, response):
        for sel in response.xpath("//strong[@class='figure_title']"):
            tv_url = sel.xpath('a/@href').extract_first()
            dict = {'url': tv_url}
            item = TvItem()
            tv_id = 'tv' + str(tv_url).split('/')[-1].split('.')[0]
            item['tv_id'] = tv_id
            item['tv_name'] = sel.xpath('a/@title').extract_first()
            item['tv_attr'] = dict
            yield item
        # 寻找下一页
        next_page = response.xpath(
            "//div[@class='mod_pages']/a[@class='page_next']/@href")\
            .extract_first()
        if next_page is not None:
            next_page = response.urljoin(next_page)
            yield scrapy.Request(next_page, callback=self.parse)
